Part A
You pick up one die and with it roll:
2 3 2 6 3 5 6 2 6 6 2 6 6 2 3 6 6 6 5 6 6 5 6 6 6 6 6 4 6 3 3 3 6 6 5 6 6
Make a graph of the posterior probability that you have picked up a loaded die as a function of the number of times you have rolled the die.
Show your code…
You can represent the rolls as data<-c(2,3,2,6,3,5,6,2,6,6,2,6,6,2,3,6,6,6,5,6,6,5,6,6,6,6,6,4,6,3,3,3,6,6,5,6,6)
rm(list=ls())
# Probs of loaded and fair die, respectively
probDie <- c(0.01,0.99)
# Likelihoods of number rolled
loaded <- c(0.1,0.1,0.1,0.1,0.1,0.5)
fair <- c(1/6,1/6,1/6,1/6,1/6,1/6)
data<-c(2,3,2,6,3,5,6,2,6,6,2,6,6,2,3,6,6,6,5,6,6,5,6,6,6,6,6,4,6,3,3,3,6,6,5,6,6)
vals <- vector();
titleStr <- ""
for (i in 1:length(data)) {
vals[i] <- probDie[1];
denom <- probDie[1] * loaded[data[i]] + probDie[2] * fair[data[i]];
probDie[1] <- probDie[1] * loaded[data[i]] / denom;
probDie[2] <- probDie[2] * fair[data[i]] / denom;
titleStr <- paste(titleStr,data[i],sep="")
plot(1:i,vals, main=titleStr,ylim=c(0,1),xlim=c(1,length(data)+1))
}
rm(list=ls())
# Probs of loaded and fair die, respectively
probDie <- c(0.01,0.99)
# Likelihoods of number rolled
loaded <- c(0.1,0.1,0.1,0.1,0.1,0.5)
fair <- c(1/6,1/6,1/6,1/6,1/6,1/6)
rollLoadedDie <- function(x) {
rolls <- vector(length=x,mode="double")
for (j in 1:x) {
rolls[j] <- sample(1:6, 1, prob=loaded)
}
return (rolls)
}
data <- rollLoadedDie(100)
vals <- vector();
titleStr <- ""
for (i in 1:length(data)) {
vals[i] <- probDie[1];
denom <- probDie[1] * loaded[data[i]] + probDie[2] * fair[data[i]];
probDie[1] <- probDie[1] * loaded[data[i]] / denom;
probDie[2] <- probDie[2] * fair[data[i]] / denom;
titleStr <- paste(titleStr,data[i],sep="")
plot(1:i,vals, main=titleStr,ylim=c(0,1),xlim=c(1,length(data)+1))
}
vals
## [1] 0.010000000 0.006024096 0.017857143 0.051724138 0.031690141 0.089403974
## [7] 0.227528090 0.469111969 0.726095618 0.888302193 0.959771796 0.934704150
## [13] 0.895713246 0.837487354 0.939247034 0.902686449 0.847691846 0.943492960
## [19] 0.909240629 0.857364708 0.947458744 0.915394751 0.970112490 0.989834958
## [25] 0.983172300 0.972264978 0.990580832 0.984399350 0.994745130 0.991272459
## [31] 0.997073794 0.995132485 0.998372213 0.999456815 0.999095019 0.998492608
## [37] 0.997490202 0.995823991 0.998604111 0.997675681 0.999224025 0.999741208
## [43] 0.999913721 0.999971239 0.999952065 0.999984021 0.999994674 0.999998225
## [49] 0.999997041 0.999999014 0.999999671 0.999999452 0.999999817 0.999999696
## [55] 0.999999493 0.999999154 0.999998591 0.999997651 0.999996085 0.999998695
## [61] 0.999999565 0.999999275 0.999998792 0.999999597 0.999999329 0.999999776
## [67] 0.999999925 0.999999876 0.999999793 0.999999931 0.999999885 0.999999808
## [73] 0.999999680 0.999999467 0.999999822 0.999999941 0.999999980 0.999999967
## [79] 0.999999989 0.999999982 0.999999994 0.999999990 0.999999997 0.999999994
## [85] 0.999999991 0.999999997 0.999999995 0.999999991 0.999999985 0.999999976
## [91] 0.999999960 0.999999933 0.999999978 0.999999993 0.999999998 0.999999999
## [97] 1.000000000 1.000000000 1.000000000 1.000000000
I found that around 54 rolls, I was 99.999% confident that the die was loaded.
Part B
You are consulting for a hospital. They have a diagnostic test for a disease with a known background prevalence of 0.1%.
The test has the following properties: p(positive result | person has disease) = 0.91 p(negative result| person does not have disease) = 0.84
The cost of running the test one time is $1. The test can be repeated for each patient and the results of the test are independent of one another allowing for Bayesian updates. The test always yields a positive or negative result.
The requirement of the hospital is that the test is repeated for each patient until a Bayesian posterior of at least 0.99999 is reached.
Run simulations for a patient with the disease. About how many times on average must the test be repeated to achieve the hospital’s requirements?
rm(list=ls())
# p(diseased), p(not diseased)
priors <- c(0.001,0.999)
# p(has the disease), p(does not have disease)
likelihoodGivenDiseased <- c(0.91,0.09)
likelihoodGivenNotDiseased <- c(0.16,0.84)
getDataFromLikelihood <- function(likelihood, numPoints) {
d <- vector(mode="integer",length=numPoints);
for (i in 1:numPoints) {
if (runif(1) <= likelihood[1]) {
d[i] <- 1;
}
else {
d[i] <- 2;
}
}
return (d)
}
#getDataFromLikelihood(likelihoodGivenDiseased,50)
#getDataFromLikelihood(likelihoodGivenNotDiseased,50)
data <- getDataFromLikelihood(likelihoodGivenDiseased,50)
vals <- vector();
titleStr <- ""
for (i in 1:length(data)) {
vals[i] <- priors[1];
denom <- priors[1] * likelihoodGivenDiseased[data[i]] + priors[2] * likelihoodGivenNotDiseased[data[i]];
priors[1] <- priors[1] * likelihoodGivenDiseased[data[i]] / denom;
priors[2] <- priors[2] * likelihoodGivenNotDiseased[data[i]] / denom;
titleStr <- paste(titleStr,data[i],sep="")
plot(1:i,vals, main=titleStr,ylim=c(0,1),xlim=c(1,length(data)+1))
}
vals
## [1] 0.001000000 0.005660964 0.031364454 0.155520563 0.511580018 0.856263838
## [7] 0.971331530 0.994837409 0.999088413 0.999839600 0.999971794 0.999995041
## [13] 0.999999128 0.999999847 0.999999973 0.999999995 0.999999999 0.999999992
## [19] 0.999999999 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [25] 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [31] 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [37] 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [43] 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [49] 1.000000000 1.000000000
It takes about 21 times to pass a posterior of 0.99999.
rm(list=ls())
# p(diseased), p(not diseased)
priors <- c(0.001,0.999)
# p(has the disease), p(does not have disease)
likelihoodGivenDiseased <- c(0.91,0.09)
likelihoodGivenNotDiseased <- c(0.16,0.84)
getDataFromLikelihood <- function(likelihood, numPoints) {
d <- vector(mode="integer",length=numPoints);
for (i in 1:numPoints) {
if (runif(1) <= likelihood[1]) {
d[i] <- 1;
}
else {
d[i] <- 2;
}
}
return (d)
}
data <- getDataFromLikelihood(likelihoodGivenNotDiseased,50)
vals <- vector();
titleStr <- ""
for (i in 1:length(data)) {
vals[i] <- priors[2];
denom <- priors[1] * likelihoodGivenDiseased[data[i]] + priors[2] * likelihoodGivenNotDiseased[data[i]];
priors[1] <- priors[1] * likelihoodGivenDiseased[data[i]] / denom;
priors[2] <- priors[2] * likelihoodGivenNotDiseased[data[i]] / denom;
titleStr <- paste(titleStr,data[i],sep="")
plot(1:i,vals, main=titleStr,ylim=c(0,1),xlim=c(1,length(data)+1))
}
vals
## [1] 0.9990000 0.9998928 0.9999885 0.9999988 0.9999999 1.0000000 0.9999999
## [8] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## [15] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## [22] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## [29] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## [36] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## [43] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## [50] 1.0000000
It takes 4 times to reach the hospital’s requirement.
Show your work/code/justification for all answers.
rm(list=ls())
# p(diseased), p(not diseased)
priors <- c(0.001,0.999)
# p(has the disease), p(does not have disease)
likelihoodGivenDiseased <- c(0.91,0.09)
likelihoodGivenNotDiseased <- c(0.16,0.84)
getDataFromLikelihood <- function(likelihood, numPoints) {
d <- vector(mode="integer",length=numPoints);
for (i in 1:numPoints) {
if (runif(1) <= likelihood[1]) {
d[i] <- 1;
}
else {
d[i] <- 2;
}
}
return (d)
}
data <- getDataFromLikelihood(likelihoodGivenDiseased,50)
vals <- vector();
titleStr <- ""
for (i in 1:length(data)) {
vals[i] <- priors[1];
denom <- priors[1] * likelihoodGivenDiseased[data[i]] + priors[2] * likelihoodGivenNotDiseased[data[i]];
priors[1] <- priors[1] * likelihoodGivenDiseased[data[i]] / denom;
priors[2] <- priors[2] * likelihoodGivenNotDiseased[data[i]] / denom;
titleStr <- paste(titleStr,data[i],sep="")
plot(1:i,vals, main=titleStr,ylim=c(0,1),xlim=c(1,length(data)+1))
}
vals #12, 14, 17, 17, 19, 12, 19
## [1] 0.0010000000 0.0056609642 0.0006096131 0.0034572952 0.0193497827
## [6] 0.1009000429 0.3896001972 0.7840249082 0.9538032732 0.9915559998
## [11] 0.9985049360 0.9997368073 0.9999537143 0.9999918615 0.9999985691
## [16] 0.9999997484 0.9999976518 0.9999995871 0.9999999274 0.9999999872
## [21] 0.9999999978 0.9999999996 0.9999999999 1.0000000000 1.0000000000
## [26] 1.0000000000 1.0000000000 1.0000000000 1.0000000000 1.0000000000
## [31] 1.0000000000 1.0000000000 1.0000000000 1.0000000000 1.0000000000
## [36] 1.0000000000 1.0000000000 1.0000000000 1.0000000000 1.0000000000
## [41] 1.0000000000 1.0000000000 1.0000000000 1.0000000000 1.0000000000
## [46] 1.0000000000 1.0000000000 1.0000000000 1.0000000000 1.0000000000
mean(12, 14, 17, 17, 19, 12, 19) # 12
## [1] 12
I ran the code multiple times and recorded the number of tests it took to pass 99.999% confidence and averaged those numbers.
Since each test costs $1 and there are 1 million patients, along with the fact that it will take about 12 tests to be over the hospital’s requirement on tests, the hospital should budget for 12 million dollars. This is from the simulation of the patients who have the disease which is more important than the patients who don’t have the disease.